Package load:
library(tidyverse)
library(readxl)
library(plotly)
library(shiny)
library(gganimate)
library(gifski)
library(av)In this project, we will create a version of the Gapminder dataset. The idea is to create a graph replicating the viz of Hans Rosling’s 200 countries, 200 years, 4 minutes. In this link, you can watch the video.
Additional note: This was a capstone project from the course of Business Intelligence and Data Analytics at the University of Victoria.
The Data files for the project can be found in this link where all the dataset of the gapminder are available
The dataset of life expectancy, income, population, and data geographies need to be downloaded from the Gapminder website and imported into the environment. The function of read_cvs can be used for this purpose.
life_expectancy <- read_csv("data/life_expectancy_years.csv")## Rows: 187 Columns: 220
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): country
## dbl (219): 1800, 1801, 1802, 1803, 1804, 1805, 1806, 1807, 1808, 1809, 1810,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
income <- read_csv("data/income_per_person_gdppercapita_ppp_inflation_adjusted.csv")## Rows: 193 Columns: 242
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): country
## dbl (241): 1800, 1801, 1802, 1803, 1804, 1805, 1806, 1807, 1808, 1809, 1810,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
population <- read_csv ("data/population_total.csv")## Rows: 195 Columns: 302
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): country
## dbl (301): 1800, 1801, 1802, 1803, 1804, 1805, 1806, 1807, 1808, 1809, 1810,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data_geo <- read_excel("data/Data Geographies - v1 - by Gapminder.xlsx", sheet = "list-of-countries-etc")Now the tables need to be organized from a wide format to a horizontal order, so it is easier to work with tidy data.
##Pivot longer for life expectancy, income, and population
## Life expectancy
life_expectancy_pivot <- life_expectancy %>%
pivot_longer(-country, names_to = "year", values_to = "life_expectancy")
##Income
income_pivot <- income %>%
pivot_longer(-country, names_to = "year", values_to = "income") %>%
filter(year < 2019)
##Population
population_pivot <- population %>%
pivot_longer(-country, names_to = "year", values_to = "population")
## Also the table of data_geo can be filtered with the variables that are going to be needed in the analysis
data_geo_2 <- data_geo %>%
select(name, four_regions) %>%
rename( Continent = four_regions)We need to combine the tables into one, so after we can create the charts.
## Initially join life expectancy and income
table_1 <- left_join(life_expectancy_pivot, income_pivot,
by = c("year" = "year", "country" = "country"))
## After join population table
table_2 <-left_join(table_1, population_pivot,
by = c("year" = "year", "country" = "country"))
## Finally add the Geographies data
table_3 <- left_join(table_2, data_geo_2,
by = c("country" = "name"))
## Within this table we will divide the population by a million so is easier to make the charts
## We will add a new column with text (income, population, country, and life expectancy), so it can be shown in an interactive chart
table_4 <- table_3 %>% mutate(population = population/1000000) %>%
mutate(text = paste("Country: ", country, "\nPopulation: ", population, "\nLife Expectancy: ",
life_expectancy, "\nIncome: ", income, sep = " " )) %>%
arrange(population)
table_4## # A tibble: 40,953 × 7
## country year life_expectancy income population Continent text
## <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr>
## 1 Brunei 1800 29.2 1510 0.00213 asia "Country: Brunei …
## 2 Brunei 1801 29.2 1510 0.00213 asia "Country: Brunei …
## 3 Brunei 1802 29.2 1510 0.00213 asia "Country: Brunei …
## 4 Brunei 1803 29.2 1510 0.00213 asia "Country: Brunei …
## 5 Brunei 1804 29.2 1510 0.00213 asia "Country: Brunei …
## 6 Brunei 1805 29.2 1510 0.00213 asia "Country: Brunei …
## 7 Brunei 1806 29.2 1510 0.00213 asia "Country: Brunei …
## 8 Brunei 1807 29.2 1510 0.00213 asia "Country: Brunei …
## 9 Brunei 1808 29.2 1510 0.00213 asia "Country: Brunei …
## 10 Brunei 1809 29.2 1510 0.00213 asia "Country: Brunei …
## # … with 40,943 more rows
If needed a chart with the data from one year, where the variables of life expectancy, income, and population are shown the next code can create it.
## STATIC bubble chart 1998
bubble_1998 <- table_4 %>% filter( year == 1998) %>%
ggplot( aes(x=income, y=life_expectancy, size = population, color = Continent, text=text)) +
geom_point(alpha=0.7) +
scale_size(range = c(1.4, 10), name="Population") +
labs(title = "Life Expectancy and Income by country (year 1998)",
subtitle = "Population expressed in millions",
caption = "Data source: Gapminder",
y = "Life Expectancy",
x = "Income") +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
## Plot
bubble_1998Just as an additional exercise the plots can be more interactive using the ggplotly function the reference for this exercise was: https://r-graph-gallery.com/bubble_chart_interactive_ggplotly.html
This particular exercise takes as reference the year 2018
## INTERACTIVE bubble chart
interactive_1998 <- ggplotly(bubble_1998, tooltip = "text") %>%
layout(title = list(text = paste0('Life Expectancy and Income by country (year 1998)',
'<br>',
'<sup>',
'Population in millions','</sup>')))
interactive_1998To get closer to Hans Rosling’s interactive plot gganimate can be used to show the change of the variables (income, life expectancy, and population) in regards to the year.
## Year need to be converted as an integer
table_4$year = as.integer(as.character(table_4$year))
animation <- ggplot(na.omit(table_4),
aes(income, life_expectancy, size = population, color = Continent)) +
geom_point() +
theme_bw() +
scale_x_log10() +
# gganimate specific bits:
labs(title = 'Life Expectancy and Income by Country \nYear: {frame_time}', subtitle = "Population in Millions", x = 'Income', y = 'life expectancy') +
transition_time(year)
##The gift speed should be slower, This can be fixed with the animate function
animate(animation,
duration = 30)## References:
## https://r-graph-gallery.com/271-ggplot2-animated-gif-chart-with-gganimate.html
## https://ugoproto.github.io/ugo_r_doc/pdf/gganimate.pdf